# Do Party Leadership Contests Forecast British General Elections?
# Accepted for publication in Electoral Studies
# Andreas Murr
# Replication code
# 12.05.2021
# Requires: Leader2019.RData (dataset), R2jags (R package), arm (R package)

# clear working memory

	rm(list=ls())

# load leader data set

	load("Leader2019.RData")

# ===========
# = table 1 =
# ===========

	Leader[c("date.gen", "date.con", "ball.con", "name.ldr.con", "name.ctn.con", "vote.ldr.con", "vote.ctn.con", "pop.con")]
	Leader[c("date.gen", "date.lab", "ball.lab", "name.ldr.lab", "name.ctn.lab", "vote.ldr.lab", "vote.ctn.lab", "pop.lab")]

# ===========
# = table 2 =
# ===========

	Leader$pre = with(Leader, ifelse(pop.con>pop.lab, "CON", "LAB"))
	Leader$date.gen==Leader$date.gen.lag

	Leader$win[Leader$date.gen=="2019-12-12"] = "con"
	Leader[c("date.gen", "inc", "pop.con", "pop.lab", "pre", "win")]

# ===========
# = table 3 =
# ===========

  Leader$date.for.na = Leader$date.for
	Leader$date.for.na[Leader$pos==0] = NA
	Leader$lead = with(Leader, date.gen-date.for)
	Leader$lead[Leader$pos==0] = NA

	Leader[c("date.gen", "date.con", "date.lab", "date.for.na", "lead")]
	mean(Leader$lead, na.rm=TRUE)

# compute lead time of previous election and historical average for comparison
	
	Sel = Leader[!is.na(Leader$win)&!is.na(Leader$pre),c("date.gen", "date.gen.lag")]
	nrow(Sel)
	mean(with(Sel, date.gen - date.gen.lag))
	round(mean(with(Sel, as.numeric(date.gen - date.gen.lag)/365)), 1)
	round(mean(as.numeric(Leader$lead), na.rm=TRUE)/365, 1)

# ===========
# = table 4 =
# ===========

	Leader$better = with(Leader, ifelse(inc=="con", pop.con>pop.lab, pop.con<pop.lab))
	Leader$reelected = with(Leader, inc==win)
	Leader$maj = with(Leader, ifelse(seat.con > .5 | seat.lab > .5, 1, 0))

	y = matrix(NA, nrow=4, ncol=2)
	y[1,1] = with(Leader, sum(pos & !better &  reelected &  maj, na.rm=TRUE))
	y[2,1] = with(Leader, sum(pos & !better &  reelected & !maj, na.rm=TRUE))
	y[3,1] = with(Leader, sum(pos & !better & !reelected & !maj, na.rm=TRUE))
	y[4,1] = with(Leader, sum(pos & !better & !reelected &  maj, na.rm=TRUE))
	y[1,2] = with(Leader, sum(pos &  better &  reelected &  maj, na.rm=TRUE))
	y[2,2] = with(Leader, sum(pos &  better &  reelected & !maj, na.rm=TRUE))
	y[3,2] = with(Leader, sum(pos &  better & !reelected & !maj, na.rm=TRUE))
	y[4,2] = with(Leader, sum(pos &  better & !reelected &  maj, na.rm=TRUE))

	alpha0 = rep(.5, 4)
	alpha1 = rep(.5, 4)
	c0 = y[,1]
	c1 = y[,2]
	p0 = (alpha0 + c0) / sum(alpha0 + c0)
	p1 = (alpha1 + c1) / sum(alpha1 + c1)
	round(cbind(p0, p1)*100, 1)
	cbind(c0, c1)

# =====================
# = bayesian analysis =
# =====================

# load packages

	library(R2jags)

# write model

	modelString = "model {
	# uniform beta(1,1) prior on both
	  theta0 ~ ddirich(c(0.5+1, 0.5+0, 0.5+1, 0.5+2))  
	  theta1 ~ ddirich(c(0.5+6, 0.5+1, 0.5+1, 0.5+1))
	}"
	write(modelString, "model.txt")

	parameters = c("theta0", "theta1" ) # The parameter(s) to be monitored.
	adaptSteps = 1000000 # Number of steps to "tune" the samplers.
	nChains = 10 # Number of chains to run.
	nIter = 1000000 # Steps per chain.
	# Create, initialize, and adapt the model:
	jagsModel = jags.model("model.txt", n.chains = nChains, n.adapt = adaptSteps)
	# sample
	codaSamples = coda.samples(jagsModel, variable.names = parameters, n.iter = nIter)

# compute quantities of interest

	mcmcChain = as.matrix(codaSamples)

# does a more popular incumbent leader increase chance of re-election?

	p.win0 = mcmcChain[,"theta0[1]"] + mcmcChain[,"theta0[2]"]
	p.win1 = mcmcChain[,"theta1[1]"] + mcmcChain[,"theta1[2]"]
	q = p.win1 - p.win0
	prop.table(table(q > 0))

# does a more popular incumbent leader increase the chances of an outright majority?

	p.hun0 = mcmcChain[,"theta0[1]"]
	p.hun1 = mcmcChain[,"theta1[1]"]
	q = p.hun1 - p.hun0
	prop.table(table(q > 0))

# ====================================================
# = binary v. continous predictor of ordinal outcome =
# ====================================================
	
	Leader$pop.inc = with(Leader, ifelse(inc=="con", pop.con, pop.lab))
	Leader$pop.opp = with(Leader, ifelse(inc=="con", pop.lab, pop.con))

	Leader$gov.out = NA
	Leader$gov.out[with(Leader, reelected==FALSE & maj==1)] = 1
	Leader$gov.out[with(Leader, reelected==FALSE & maj==0)] = 2
	Leader$gov.out[with(Leader, reelected==TRUE & maj==0)] = 3
	Leader$gov.out[with(Leader, reelected==TRUE & maj==1)] = 4
	Leader$gov.out = factor(Leader$gov.out)

	library(MASS)
	
	data = Leader
	data = na.omit(data[,c("gov.out", "better", "pop.inc", "pop.opp")])
	err = matrix(NA, nrow=nrow(data), ncol=2)
	pro = matrix(NA, nrow=nrow(data), ncol=2)
	for (i in 1:nrow(data)){		
		# fit models
		m.bin = polr(gov.out ~ better, data=data[-i,])
		m.con = polr(gov.out ~ I(pop.inc-pop.opp), data=data[-i,])
		# predict outcome
		y.bin = predict(m.bin, data[i,], type="class")
		y.con = predict(m.con, data[i,], type="class")
		y = as.numeric(as.character(data$gov.out[i]))
		p.bin = predict(m.bin, data[i,], type="probs")[y]
		p.con = predict(m.con, data[i,], type="probs")[y]		
		# compute whether error occurs
		err[i,] = data$gov.out[i] != c(y.bin, y.con)
		pro[i,] = c(p.bin, p.con)
	}
	colnames(err) = c("bin", "con")
	colnames(pro) = c("bin", "con")
	err = data.frame(err)
	pro = data.frame(pro)
	round(apply(err, 2, mean)*100, 1)
	round(apply(pro, 2, mean)*100, 1)

# ============
# = table 5  =
# ============

# fit models

	data = na.omit(Leader[c("seat.inc", "seat.opp", "seat.inc.lag", "seat.opp.lag", "better")])
	# historical mean
	m.his.inc = lm(I(seat.inc*100) ~ 1, data=data)
	m.his.opp = lm(I(seat.opp*100) ~ 1, data=data)
	# persistence
	m.per.inc = lm(I(seat.inc*100) ~ offset(I(seat.inc.lag*100)) - 1, data=data)
	m.per.opp = lm(I(seat.opp*100) ~ offset(I(seat.opp.lag*100)) - 1, data=data)
	# autoregressive
	m.aut.inc = lm(I(seat.inc*100) ~ 1 + I(seat.inc.lag*100), data=data)
	m.aut.opp = lm(I(seat.opp*100) ~ 1 + I(seat.opp.lag*100), data=data)
	# party leadership model
	m.bet.inc = lm(I(seat.inc*100) ~ 1 + better, data=data)
	m.bet.opp = lm(I(seat.opp*100) ~ 1 + I(!better), data=data)

# display models
	
	library(arm)
	# incumbent
	display(m.his.inc, digits=1)
	display(m.per.inc, digits=1)
	display(m.aut.inc, digits=1)
	display(m.bet.inc, digits=1)
	# opposition
	display(m.his.opp, digits=1)
	display(m.per.opp, digits=1)
	display(m.aut.opp, digits=1)
	display(m.bet.opp, digits=1)

# evaluate models: rmse	

	loo.errors = function(x){
		e = x$residuals
		h = lm.influence(x)$hat
		sqrt(mean((e / (1 - h))^2))
	}

	# historical mean
	round(loo.errors(m.his.inc), 1)
	round(loo.errors(m.his.opp), 1)
	# persistence
	round(loo.errors(m.per.inc), 1)
	round(loo.errors(m.per.opp), 1)
	# autoregressive
	round(loo.errors(m.aut.inc), 1)
	round(loo.errors(m.aut.opp), 1)
	# party leadership model
	round(loo.errors(m.bet.inc), 1)
	round(loo.errors(m.bet.opp), 1)
	# relative reduction in errors
	1 - loo.errors(m.bet.inc) / min(c(loo.errors(m.his.inc), loo.errors(m.per.inc), loo.errors(m.aut.inc)))
	1 - loo.errors(m.bet.opp) / min(c(loo.errors(m.his.opp), loo.errors(m.per.opp), loo.errors(m.aut.opp)))

# check coverage of prediction interval

	# incumbent
	cov.his.inc = rep(NA, nrow(data))
	cov.per.inc = rep(NA, nrow(data))
	cov.aut.inc = rep(NA, nrow(data))
	cov.bet.inc = rep(NA, nrow(data))
	# opposition
	cov.his.opp = rep(NA, nrow(data))
	cov.per.opp = rep(NA, nrow(data))
	cov.aut.opp = rep(NA, nrow(data))
	cov.bet.opp = rep(NA, nrow(data))

	for (i in 1:nrow(data)){
		# fit models
		## historical mean
		m.his.inc.c = lm(I(seat.inc*100) ~ 1, data=data[-i,])
		m.his.opp.c = lm(I(seat.opp*100) ~ 1, data=data[-i,])
		## presistence
		m.per.inc.c = lm(I(seat.inc*100) ~ offset(I(seat.inc.lag*100)) - 1, data=data[-i,])
		m.per.opp.c = lm(I(seat.opp*100) ~ offset(I(seat.opp.lag*100)) - 1, data=data[-i,])
		## autoregressive
		m.aut.inc.c = lm(I(seat.inc*100) ~ 1 + I(seat.inc.lag*100), data=data[-i,])
		m.aut.opp.c = lm(I(seat.opp*100) ~ 1 + I(seat.opp.lag*100), data=data[-i,])
		## party leadership model
		m.bet.inc.c = lm(I(seat.inc*100) ~ 1 + better, data=data[-i,])
		m.bet.opp.c = lm(I(seat.opp*100) ~ 1 + I(!better), data=data[-i,])
		# make predictions
		int.his.inc = predict(m.his.inc.c, data[i,], interval="prediction")
		int.per.inc = predict(m.per.inc.c, data[i,], interval="prediction")[1,]
		int.aut.inc = predict(m.aut.inc.c, data[i,], interval="prediction")
		int.bet.inc = predict(m.bet.inc.c, data[i,], interval="prediction")
		int.his.opp = predict(m.his.opp.c, data[i,], interval="prediction")
		int.per.opp = predict(m.per.opp.c, data[i,], interval="prediction")[1,]
		int.aut.opp = predict(m.aut.opp.c, data[i,], interval="prediction")
		int.bet.opp = predict(m.bet.opp.c, data[i,], interval="prediction")
		# assess coverage
		cov.his.inc[i] = (data$seat.inc[i]*100 >= int.his.inc[2]) & (data$seat.inc[i]*100 <= int.his.inc[3])
		cov.per.inc[i] = (data$seat.inc[i]*100 >= int.per.inc[2]) & (data$seat.inc[i]*100 <= int.per.inc[3])
		cov.aut.inc[i] = (data$seat.inc[i]*100 >= int.aut.inc[2]) & (data$seat.inc[i]*100 <= int.aut.inc[3])
		cov.bet.inc[i] = (data$seat.inc[i]*100 >= int.bet.inc[2]) & (data$seat.inc[i]*100 <= int.bet.inc[3])
		cov.his.opp[i] = (data$seat.opp[i]*100 >= int.his.opp[2]) & (data$seat.opp[i]*100 <= int.his.opp[3])
		cov.per.opp[i] = (data$seat.opp[i]*100 >= int.per.opp[2]) & (data$seat.opp[i]*100 <= int.per.opp[3])
		cov.aut.opp[i] = (data$seat.opp[i]*100 >= int.aut.opp[2]) & (data$seat.opp[i]*100 <= int.aut.opp[3])
		cov.bet.opp[i] = (data$seat.opp[i]*100 >= int.bet.opp[2]) & (data$seat.opp[i]*100 <= int.bet.opp[3])
	}

	round(mean(cov.his.inc), 2)
	round(mean(cov.per.inc), 2)
	round(mean(cov.aut.inc), 2)
	round(mean(cov.bet.inc), 2)
	round(mean(cov.his.opp), 2)
	round(mean(cov.per.opp), 2)
	round(mean(cov.aut.opp), 2)
	round(mean(cov.bet.opp), 2)

# make prediction for 2019

	Pre = Leader[Leader$date.gen==as.Date("2019-12-12"),]
	# incumbent
	int.his.inc = predict(m.his.inc, Pre, interval="prediction")
	int.per.inc = predict(m.per.inc, Pre, interval="prediction")
	int.aut.inc = predict(m.aut.inc, Pre, interval="prediction")
	int.bet.inc = predict(m.bet.inc, Pre, interval="prediction")
	round(int.his.inc, 1)
	round(int.per.inc, 1)[1,]
	round(int.aut.inc, 1)
	round(int.bet.inc, 1)
	round((int.his.inc/100)*650, 0)
	round((int.per.inc/100)*650, 0)[1,]
	round((int.aut.inc/100)*650, 0)
	round((int.bet.inc/100)*650, 0)
	round(int.his.inc[,3] - int.his.inc[,2], 1)
	round(int.per.inc[,3] - int.per.inc[,2], 1)
	round(int.aut.inc[,3] - int.aut.inc[,2], 1)
	round(int.bet.inc[,3] - int.bet.inc[,2], 1)
	# opposition
	int.his.opp = predict(m.his.opp, Pre, interval="prediction")
	int.per.opp = predict(m.per.opp, Pre, interval="prediction")
	int.aut.opp = predict(m.aut.opp, Pre, interval="prediction")
	int.bet.opp = predict(m.bet.opp, Pre, interval="prediction")
	round(int.his.opp, 1)
	round(int.per.opp, 1)[1,]
	round(int.aut.opp, 1)
	round(int.bet.opp, 1)
	round((int.his.opp/100)*650, 0)
	round((int.per.opp/100)*650, 0)[1,]
	round((int.aut.opp/100)*650, 0)
	round((int.bet.opp/100)*650, 0)
	round(int.his.opp[,3] - int.his.opp[,2], 1)
	round(int.per.opp[,3] - int.per.opp[,2], 1)
	round(int.aut.opp[,3] - int.aut.opp[,2], 1)
	round(int.bet.opp[,3] - int.bet.opp[,2], 1)
		
# ==============================
# = alternative specifications =
# ==============================

	data = na.omit(Leader[c("seat.inc", "seat.opp", "seat.inc.lag", "seat.opp.lag", "better", "pop.inc", "pop.opp")])
	# party leadership model and lagged seat share lag
	m.pla.inc = lm(I(seat.inc*100) ~ 1 + I(seat.inc.lag*100) + better, data=data)
	m.pla.opp = lm(I(seat.opp*100) ~ 1 + I(seat.opp.lag*100) + I(!better), data=data)
	# leader popularity
	m.lea.inc = lm(I(seat.inc*100) ~ pop.inc, data=data)
	m.lea.opp = lm(I(seat.opp*100) ~ pop.opp, data=data)
	# difference in leader popularity
	m.dif.inc = lm(I(seat.inc*100) ~ I(pop.inc-pop.opp), data=data)
	m.dif.opp = lm(I(seat.opp*100) ~ I(pop.opp-pop.inc), data=data)

	# party leadership model and lagged seat share
	round(loo.errors(m.pla.inc), 1)
	round(loo.errors(m.pla.opp), 1)
	# leader popularity
	round(loo.errors(m.lea.inc), 1)
	round(loo.errors(m.lea.opp), 1)
	# difference in leader popularity
	round(loo.errors(m.dif.inc), 1)
	round(loo.errors(m.dif.opp), 1)

# ===============
# = post-mortem =
# ===============

	shr.inc = (365/650)*100
	shr.opp = (202/650)*100

	round(sqrt((shr.inc - int.his.inc[,1])^2), 1)
	round(sqrt((shr.inc - int.per.inc[,1])^2), 1)[1]
	round(sqrt((shr.inc - int.aut.inc[,1])^2), 1)
	round(sqrt((shr.inc - int.bet.inc[,1])^2), 1)

	round(sqrt((shr.opp - int.his.opp[,1])^2), 1)
	round(sqrt((shr.opp - int.per.opp[,1])^2), 1)[1]
	round(sqrt((shr.opp - int.aut.opp[,1])^2), 1)
	round(sqrt((shr.opp - int.bet.opp[,1])^2), 1)
	
# ===================
# = end source code =
# ===================